home *** CD-ROM | disk | FTP | other *** search
- /* MIFwriter.c -- MIF output support for WWW
- * $Id: SGMLmain.c,v 1.3 93/01/06 18:40:27 connolly Exp Locker: connolly $
- */
-
- /* implements ... */
- #include "MIFwriter.h"
-
- /* uses ... */
- #include "SGML.h"
- #include "HTParse.h"
- #include "HTMLdtd.h"
-
- #include <stdio.h>
- #include <string.h>
-
- #include "object.h"
- #include "debug.h"
-
-
- typedef struct{
- char gi[SGML_NAMELEN + 1];
- int content;
- }Element;
-
-
- typedef struct{
- FILE* out;
- Element stack[SGML_TAGLVL];
- int literal;
- int taglvl;
- int needspace;
- int empty; /* current paragraph is empty */
-
- enum {
- MIFFile,
- VariableFormats, VariableDef,
- TextFlow, ParaLine, Font
- }state;
- }MIF;
-
- #define STATE(m, s, l, p) ((m)->state=(s),(m)->literal=l, \
- (p) ? ((m)->empty=1,(m)->needspace=0) : 0)
-
-
- static HMStartTagProc start_tag;
-
- static HMEndTagProc end_tag;
-
- static HMDataProc data;
-
- static HMFileWriterProc MIFwriter_new;
-
- static HMDeleteProc MIFwriter_dt;
-
- static VOID
- marker PARAMS((MIF* m,
- CONST HMBinding *attributes,
- int nattrs));
-
-
- HMDoc_Class MIFwriter = {MIFwriter_new, 0, MIFwriter_dt,
- start_tag, end_tag, data, html_entity_text};
-
-
- static int FrameEncoding[] =
- {
- /* 160 /space -> */ 0x20,
- /* 161 /exclamdown -> */ 0xc1,
- /* 162 /cent -> */ 0xa2,
- /* 163 /sterling -> */ 0xa3,
- /* 164 /currency -> */ 0xdb,
- /* 165 /yen -> */ 0xb4,
- /* 166 /brokenbar -> */ 0x00,
- /* 167 /section -> */ 0xa4,
- /* 168 /dieresis -> */ 0xac,
- /* 169 /copyright -> */ 0xa9,
- /* 170 /ordfeminine -> */ 0xbb,
- /* 171 /guillemotleft -> */ 0xc7,
- /* 172 /logicalnot -> */ 0xc2,
- /* 173 /hyphen -> */ 0x2d,
- /* 174 /registered -> */ 0xa8,
- /* 175 /macron -> */ 0xf8,
- /* 176 /degree -> */ 0x00,
- /* 177 /plusminus -> */ 0x00,
- /* 178 /twosuperior -> */ 0x00,
- /* 179 /threesuperior -> */ 0x00,
- /* 180 /acute -> */ 0xab,
- /* 181 /mu -> */ 0x00,
- /* 182 /paragraph -> */ 0xa6,
- /* 183 /periodcentered -> */ 0xe1,
- /* 184 /cedilla -> */ 0xfc,
- /* 185 /onesuperior -> */ 0x00,
- /* 186 /ordmasculine -> */ 0xbc,
- /* 187 /guillemotright -> */ 0xc8,
- /* 188 /onequarter -> */ 0x00,
- /* 189 /onehalf -> */ 0x00,
- /* 190 /threequarters -> */ 0x00,
- /* 191 /questiondown -> */ 0xc0,
- /* 192 /Agrave -> */ 0xcb,
- /* 193 /Aacute -> */ 0xe7,
- /* 194 /Acircumflex -> */ 0xe5,
- /* 195 /Atilde -> */ 0xcc,
- /* 196 /Adieresis -> */ 0x80,
- /* 197 /Aring -> */ 0x81,
- /* 198 /AE -> */ 0xae,
- /* 199 /Ccedilla -> */ 0x82,
- /* 200 /Egrave -> */ 0xe9,
- /* 201 /Eacute -> */ 0x83,
- /* 202 /Ecircumflex -> */ 0xe6,
- /* 203 /Edieresis -> */ 0xe8,
- /* 204 /Igrave -> */ 0xed,
- /* 205 /Iacute -> */ 0xea,
- /* 206 /Icircumflex -> */ 0xeb,
- /* 207 /Idieresis -> */ 0xec,
- /* 208 /Eth -> */ 0x00,
- /* 209 /Ntilde -> */ 0x84,
- /* 210 /Ograve -> */ 0xf1,
- /* 211 /Oacute -> */ 0xee,
- /* 212 /Ocircumflex -> */ 0xef,
- /* 213 /Otilde -> */ 0xcd,
- /* 214 /Odieresis -> */ 0x85,
- /* 215 /multiply -> */ 0x00,
- /* 216 /Oslash -> */ 0xaf,
- /* 217 /Ugrave -> */ 0xf4,
- /* 218 /Uacute -> */ 0xf2,
- /* 219 /Ucircumflex -> */ 0xf3,
- /* 220 /Udieresis -> */ 0x86,
- /* 221 /Yacute -> */ 0x00,
- /* 222 /Thorn -> */ 0x00,
- /* 223 /germandbls -> */ 0xa7,
- /* 224 /agrave -> */ 0x88,
- /* 225 /aacute -> */ 0x87,
- /* 226 /acircumflex -> */ 0x89,
- /* 227 /atilde -> */ 0x8b,
- /* 228 /adieresis -> */ 0x8a,
- /* 229 /aring -> */ 0x8c,
- /* 230 /ae -> */ 0xbe,
- /* 231 /ccedilla -> */ 0x8d,
- /* 232 /egrave -> */ 0x8f,
- /* 233 /eacute -> */ 0x8e,
- /* 234 /ecircumflex -> */ 0x90,
- /* 235 /edieresis -> */ 0x91,
- /* 236 /igrave -> */ 0x93,
- /* 237 /iacute -> */ 0x92,
- /* 238 /icircumflex -> */ 0x94,
- /* 239 /idieresis -> */ 0x95,
- /* 240 /eth -> */ 0x00,
- /* 241 /ntilde -> */ 0x96,
- /* 242 /ograve -> */ 0x98,
- /* 243 /oacute -> */ 0x97,
- /* 244 /ocircumflex -> */ 0x99,
- /* 245 /otilde -> */ 0x9b,
- /* 246 /odieresis -> */ 0x9a,
- /* 247 /divide -> */ 0x00,
- /* 248 /oslash -> */ 0xbf,
- /* 249 /ugrave -> */ 0x9d,
- /* 250 /uacute -> */ 0x9c,
- /* 251 /ucircumflex -> */ 0x9e,
- /* 252 /udieresis -> */ 0x9f,
- /* 253 /yacute -> */ 0x00,
- /* 254 /thorn -> */ 0x00,
- /* 255 /ydieresis -> */ 0xd8,
- };
-
-
- /* mifwriter constructor */
- static HMDoc*
- MIFwriter_new(fp)
- FILE* fp;
- {
- MIF* m = NEW(MIF, 1);
- m->out = fp;
- m->taglvl = 1;
- strcpy(m->stack[0].gi, "HTML"); /* @@ fake tag minimization */
- STATE(m, MIFFile, 0, 1);
-
- fprintf(m->out,
- "<MIFFile 3.00> # Generated by html2mif\n"
- );
- return (HMDoc*)m;
- }
-
-
- static VOID
- MIFwriter_dt(this)
- HMDoc* this;
- {
- FREE(this);
- }
-
-
-
- static VOID
- data(document, chars, nchars)
- HMDoc* document;
- CONST char* chars;
- int nchars;
- {
- MIF* m = (MIF*)document;
- Element* e = &m->stack[m->taglvl - 1];
- CONST char* p;
-
- debug(("<emptypar: %d 1st char: %d nchars: %d>\n",
- m->empty, chars[0], nchars));
-
- if(chars[0] == '\n' && nchars <2 &&
- m->literal == 0 && m->empty)
- return;
-
- switch(m->state){
- case MIFFile:
- start_tag((HMDoc*)m, "BODY", 0, 0);
- fprintf(m->out,
- " <Para\n"
- " <PgfTag `BODY'>\n"
- " <ParaLine\n"
- " <String `");
- STATE(m, ParaLine, 0, 1);
- break;
-
- case TextFlow:
- fprintf(m->out,
- " <Para\n"
- " <PgfTag `%s'>\n"
- " <ParaLine\n"
- " <String `", e->gi);
- STATE(m, ParaLine, 0, 1);
- break;
-
- case VariableFormats:
- /* in element content. Skip data */
- return;
-
- case VariableDef:
- /* nothing */
- break;
-
- default:
- fprintf(m->out,
- " <String `");
- }
-
- for(p = chars; p-chars < nchars; p++){
- if(*p != '\n')
- m->empty = 0;
-
- if(*p & 0x80){
- int i = (*p & 0xFF) - 160;
- if(i < 96) /* in ISOlat1 encoding? */
- printf("\\x%02x ", FrameEncoding[i]);
- }else
- switch(*p){
- case '\n':
- if(m->literal)
- fprintf(m->out,
- "'>\n"
- " <Char HardReturn>\n"
- " > # End ParaLine\n"
- " <ParaLine\n"
- " <String `");
- else if (m->needspace){
- fprintf(m->out, " ");
- m->needspace = 0;
- }
- break;
-
- case '\r':
- /* nothing */
- break;
-
- case '\t':
- fprintf(m->out, "\\t");
- m->needspace = 0;
- break;
-
- case '>':
- fprintf(m->out, "\\>");
- m->needspace = 1;
- break;
-
- case '\'':
- fprintf(m->out, "\\q");
- m->needspace = 1;
- break;
-
- case '`':
- fprintf(m->out, "\\Q");
- m->needspace = 1;
- break;
-
- case '\\':
- fprintf(m->out, "\\\\");
- m->needspace = 1;
- break;
-
- case ' ':
- if(m->literal){
- fprintf(m->out,
- "'>\n"
- " <Char HardSpace>\n"
- " <String `");
- }else{
- m->needspace = 0;
- fprintf(m->out, " ");
- }
- break;
-
- default:
- m->needspace = 1;
- fprintf(m->out, "%c", *p);
- }
- }
-
- fprintf(m->out, "'>\n");
- }
-
-
- #if 0
- /* save this for insets */
- static VOID
- entity(document, name)
- HMDoc* document;
- CONST char* name;
- {
- MIF* m = (MIF*)document;
-
- /*@@ same prep work as data */
- fprintf(m->out, " <Char %s>\n", name);
- m->needspace = 1;
- }
- #endif
-
-
- static VOID
- marker(m, attributes, nattrs)
- MIF* m;
- CONST HMBinding *attributes;
- int nattrs;
- {
- int i;
- char* name = 0;
- char* href = 0;
-
- for(i = 0; i < nattrs; i++){
- if(!strcmp(attributes[i].name, "NAME"))
- name = attributes[i].value;
- else if(!strcmp(attributes[i].name, "HREF"))
- href = attributes[i].value;
- }
-
- if(href){
- char* anchor = HTParse(href, "", PARSE_ANCHOR);
- char* scheme = HTParse(href, "", PARSE_ACCESS);
- char* path = HTParse(href, "", PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
-
- fprintf(m->out,
- " <Marker\n"
- " <MType 8>\n");
- if(scheme && *scheme)
- fprintf(m->out,
- " <MText `message www %s:%s#%s'>\n",
- scheme, path, anchor);
- else if(path && path[0] && path[1]){ /*@@ in case of just "/" */
- if(anchor && *anchor)
- fprintf(m->out,
- " <MText `gotolink %s:%s'>\n",
- path, anchor);
- else
- fprintf(m->out,
- " <MText `gotolink %s:firstpage'>\n",
- path);
- }else
- fprintf(m->out,
- " <MText `gotolink %s'>\n",
- anchor);
-
- fprintf(m->out,
- " > #End of Marker\n");
-
- free(scheme);
- free(path);
- free(anchor);
- }
- else if (name){
- fprintf(m->out,
- " <Marker\n"
- " <MType 8>\n"
- " <MText `newlink %s'>\n"
- " > #End of Marker\n",
- name);
- }
- }
-
-
- static int
- start_tag(document, gi, attributes, nattrs)
- HMDoc* document;
- CONST char* gi;
- CONST HMBinding attributes[];
- int nattrs;
- {
- MIF* m = (MIF*)document;
- Element* e = &m->stack[m->taglvl++];
- int taglevel = -1;
-
- m->needspace = 0;
-
- strcpy(e->gi, gi);
- debug(("stacking '%s'\n", gi));
-
- if(!strcmp(gi, "H1") ||
- !strcmp(gi, "H2") ||
- !strcmp(gi, "H3") ||
- !strcmp(gi, "H4") ||
- !strcmp(gi, "H5") ||
- !strcmp(gi, "H6") ||
- !strcmp(gi, "PRE") ||
- !strcmp(gi, "XMP") ||
- !strcmp(gi, "LISTING") ||
- !strcmp(gi, "ADDRESS") ||
- !strcmp(gi, "BLOCKQUOTE") ||
- !strcmp(gi, "UL") ||
- !strcmp(gi, "OL") ||
- !strcmp(gi, "MENU") ||
- !strcmp(gi, "DIR") ||
- !strcmp(gi, "DL")
- )
- taglevel = ParaLine;
- else
- if(!strcmp(gi, "A") ||
- !strcmp(gi, "EM") ||
- !strcmp(gi, "TT") ||
- !strcmp(gi, "STRONG") ||
- !strcmp(gi, "B") ||
- !strcmp(gi, "I") ||
- !strcmp(gi, "U") ||
- !strcmp(gi, "CODE") ||
- !strcmp(gi, "SAMP") ||
- !strcmp(gi, "KBD") ||
- !strcmp(gi, "KEY") ||
- !strcmp(gi, "VAR") ||
- !strcmp(gi, "DFN") ||
- !strcmp(gi, "CITE"))
- taglevel = Font;
-
- while(1){
- switch(m->state){
- case MIFFile:
- if(!strcmp(gi, "BODY")){
- fprintf(m->out, "<TextFlow\n");
- STATE(m, TextFlow, 0, 1);
- return e->content = SGML_MIXED;
- }
-
- else if(!strcmp(gi, "HEAD")){
- return e->content = SGML_ELEMENT;
- }
-
- else if(!strcmp(gi, "TITLE")){
- fprintf(m->out,
- "<VariableFormats\n"
- " <VariableFormat\n"
- " <VariableName `Title'>\n"
- " <VariableDef `"
- );
-
- STATE(m, VariableDef, 0, 1);
- return e->content = SGML_RCDATA; /*@@ CDATA? */
- }
-
- else if(!strcmp(gi, "ISINDEX")){
- fprintf(m->out,
- "<VariableFormats\n"
- " <VariableFormat\n"
- " <VariableName `Index'>\n"
- " <VariableDef `True'>\n"
- " >\n"
- );
-
- STATE(m, VariableFormats, 0, 1);
- m->taglvl--;
- return SGML_EMPTY;
- }
-
- else if(taglevel == ParaLine || taglevel == Font){
- start_tag((HMDoc*)m, "BODY", 0, 0);
- }
-
- else{
- debug(("'%s' out of context in state %d", gi, m->state));
- m->taglvl--;
- return SGML_EMPTY;
- }
-
- break;
-
-
- case VariableFormats:
- if(!strcmp(gi, "TITLE")){
- fprintf(m->out,
- " <VariableFormat\n"
- " <VariableName `Title'>\n"
- " <VariableDef `"
- );
-
- STATE(m, VariableDef, 0, 1);
- return e->content = SGML_RCDATA; /*@@ CDATA? */
- }
-
- else if(!strcmp(gi, "ISINDEX")){
- fprintf(m->out,
- " <VariableFormat\n"
- " <VariableName `Index'>\n"
- " <VariableDef `True'>\n"
- " >\n"
- );
-
- m->taglvl--;
- return SGML_EMPTY;
- }
-
- else{
- fprintf(m->out,
- " > #End of VariableFormats\n");
- STATE(m, MIFFile, 0, 1);
- }
- break;
-
-
- case TextFlow:
- if(!strcmp(gi, "PRE")){
- fprintf(m->out,
- " <Para\n"
- " <PgfTag `%s'>\n"
- " <ParaLine\n"
- , gi);
- STATE(m, ParaLine, 1, 1);
- return e->content = SGML_MIXED;
- }
-
- else if(!strcmp(gi, "XMP") ||
- !strcmp(gi, "LISTING")){
- fprintf(m->out,
- " <Para\n"
- " <PgfTag `%s'>\n"
- " <ParaLine\n"
- , gi);
- STATE(m, ParaLine, 1, 1);
- return e->content = SGML_RCDATA;
- }
-
- else if(taglevel == ParaLine){
- fprintf(m->out,
- " <Para\n"
- " <PgfTag `%s'>\n"
- " <ParaLine\n"
- , gi);
-
- STATE(m, ParaLine, 0, 1);
- return e->content = SGML_MIXED;
- }
-
- else if(taglevel == Font){
- debug(("%s: transition from TextFlow to BODY ParaLine", gi));
-
- fprintf(m->out,
- " <Para\n"
- " <PgfTag `BODY'>\n"
- " <ParaLine\n");
-
- STATE(m, ParaLine, 0, 1);
- }
-
- else{
- debug(("'%s' out of context in state %d", gi, m->state));
- m->taglvl--;
- return SGML_EMPTY;
- }
-
- break;
-
- case ParaLine:
- if(!strcmp(gi, "A")){
- fprintf(m->out,
- " <Font\n"
- " <FTag `%s'>\n"
- " >\n", gi);
-
- marker(m, attributes, nattrs);
-
- STATE(m, Font, m->literal, 0);
- return e->content = SGML_MIXED;
- }
-
- else if(taglevel == Font){
- fprintf(m->out,
- " <Font\n"
- " <FTag `%s'>\n"
- " >\n"
- , gi);
-
- STATE(m, Font, m->literal, 0);
- return e->content = SGML_MIXED;
- }
-
- else if(!strcmp(gi, "P")){
- m->taglvl--;
- if(!m->empty)
- fprintf(m->out,
- " > # End ParaLine\n"
- " > # End Para\n");
- STATE(m, TextFlow, 0, 1);
- return SGML_EMPTY;
- }
-
- else if(!strcmp(gi, "DT") ||
- !strcmp(gi, "LI")){
- m->taglvl--;
- if(!m->empty)
- fprintf(m->out,
- " > # End ParaLine\n"
- " > # End Para\n"
- " <Para\n"
- " <ParaLine\n");
-
- m->empty = 1;
- m->needspace = 0;
- return SGML_EMPTY;
- }
-
- else if(!strcmp(gi, "DD")){
- fprintf(m->out,
- " <Char Tab>\n");
-
- m->taglvl--;
- return SGML_EMPTY;
- }
-
- else if(taglevel = ParaLine){
- debug(("'%s' start tag: back to TextFlow state\n", gi));
- fprintf(m->out,
- " > # End of ParaLine\n"
- " > # End of Para\n"
- );
- STATE(m, TextFlow, 0, 1);
- }
-
- else{
- debug(("'%s' out of context in state %d", gi, m->state));
- m->taglvl--;
- return SGML_EMPTY;
- }
-
- break;
-
- default:
- debug(("state %d unexpected (<%s>)\n", m->state, gi));
- m->taglvl--;
- return SGML_EMPTY;
- }
- }
- }
-
-
-
- static VOID
- end_tag(document, gi)
- HMDoc* document;
- CONST char* gi;
- {
- MIF* m = (MIF*)document;
- Element* e;
- int i;
-
- for(i = m->taglvl - 1; i>=0; i--){
- debug(("found </%s>. stack has %s\n", gi, m->stack[i].gi));
- if(m->stack[i].content == SGML_RCDATA ||
- m->stack[i].content == SGML_CDATA ||
- !strcmp(gi, m->stack[i].gi))
- break;
- }
-
- if(i < 0){
- debug(("Parse error: '%s' end tag with no such element open.\n", gi));
- return;
- }
-
- while(m->taglvl > i){
- m->taglvl--;
- switch(m->state){
- case VariableDef:
- fprintf(m->out,
- " > #End of VariableFormat\n");
- STATE(m, VariableFormats, 0, 1);
- break;
-
- case VariableFormats:
- fprintf(m->out,
- "> #End of VariableFormats\n");
- STATE(m, MIFFile, 0, 1);
- break;
-
- case TextFlow:
- fprintf(m->out,
- "> # End of TextFlow\n");
- STATE(m, MIFFile, 0, 1);
- break;
-
- case ParaLine:
- fprintf(m->out,
- " > # End of ParaLine\n"
- " > # End of Para\n");
- STATE(m, TextFlow, 0, 1);
- break;
-
- case Font:
- fprintf(m->out,
- " <Font\n"
- " <FTag `'>\n"
- " > # End of Font\n");
- STATE(m, ParaLine, m->literal, 0);
- break;
-
- default:
- debug(("'%s' end tag unexpected in state %d.", gi, m->state));
- }
- }
- }
-
-